In [16]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
df= pd.read_csv('F:/OneDrive/MYW/data_science/airbnb-SH-listings.csv/listings-locations.csv')
import json
with open('F:/OneDrive/MYW/data_science/geojson-china/data-master/json/geo/china/province-city/shanghai.geojson', encoding= 'utf-8') as f:
    districts_map = json.load(f)
In [17]:
districts_map['features']
col = "aliceblue, antiquewhite, aqua, aquamarine, azure,beige, bisque, black, blanchedalmond, blue,blueviolet, brown, burlywood, cadetblue,chartreuse, chocolate".split(",")
nei = set(df['neighbourhood'].values.tolist())
k_v = {k:v for k,v in zip(nei,col)}
df['col'] = df['neighbourhood'].apply(lambda x:k_v[x])
In [18]:
fig = px.scatter_mapbox(df,
                        lon = 'longitude', 
                        lat = 'latitude',  

                        color ="col",
                        hover_name ='name',
                        hover_data = None,
                        # color_continuous_scale = px.colors.carto.Temps
                       )
In [19]:
fig.update_layout(
   mapbox =  {'accesstoken': 'pk.eyJ1IjoicGlnZ3lzcDExMDIiLCJhIjoiY2t4ajc1YzlhMHJvcjJ2cXdhb3I5c3JwMiJ9.8Y2WR2f5TrE5DqEyO-rt3g', 'center':{ 'lat':31.224361,'lon':121.469170}, 'zoom':8},
   margin = {'l':0, 'r':0, 't':0, 'b':0} )
geo = dict(
        scope = 'asia',
        showland = True,
        landcolor = 'rgb(212,212,212)',
        subunitcolor = 'rgb(255,255,255)',
        countrycolor = 'rgb(255,255,255)',
        showlakes = True,
        showcountries = True,
        resolution = 50,
        projection = dict(
            type = 'conic conformal',
            rotation_lon = -100
        ),
        lonaxis = dict(
            showgrid = True,
            gridwidth = 0.5,
            range = [120.8, 122],
            dtick = 0.1
        ),
        lataxis = dict(
            showgrid = True,
            gridwidth = 0.5,
            range = [30.5,32],
            dtick = 0.1)
    ),

fig.show()
In [2]:
pip install jieba
Requirement already satisfied: jieba in e:\anaconda\lib\site-packages (0.42.1)
Note: you may need to restart the kernel to use updated packages.
In [11]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from nltk import word_tokenize
from collections import Counter
import re
import jieba
from wordcloud import WordCloud
In [12]:
china = ""
english = ""

plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
df= pd.read_csv('F:/OneDrive/MYW/data_science/airbnb-SH-listings.csv/listings-locations.csv')
df['neighbourhood'] = df['neighbourhood'].apply(lambda x:x.split("/")[0].strip())
df1 = df.groupby('neighbourhood')['host_id'].count()
plt.pie(x=df1.values,labels=df1.index.values)
# plt.show()
Out[12]:
([<matplotlib.patches.Wedge at 0x1c61963a9d0>,
  <matplotlib.patches.Wedge at 0x1c61963aeb0>,
  <matplotlib.patches.Wedge at 0x1c619648370>,
  <matplotlib.patches.Wedge at 0x1c6196487f0>,
  <matplotlib.patches.Wedge at 0x1c619648c70>,
  <matplotlib.patches.Wedge at 0x1c61964d130>,
  <matplotlib.patches.Wedge at 0x1c61964d5b0>,
  <matplotlib.patches.Wedge at 0x1c61964da30>,
  <matplotlib.patches.Wedge at 0x1c61964deb0>,
  <matplotlib.patches.Wedge at 0x1c619651370>,
  <matplotlib.patches.Wedge at 0x1c61a054970>,
  <matplotlib.patches.Wedge at 0x1c619651c40>,
  <matplotlib.patches.Wedge at 0x1c61961c100>,
  <matplotlib.patches.Wedge at 0x1c61961c580>,
  <matplotlib.patches.Wedge at 0x1c61961ca00>,
  <matplotlib.patches.Wedge at 0x1c61961ce80>],
 [Text(1.0962746501780334, 0.0904538079741836, '嘉定区'),
  Text(1.0789790127904655, 0.2140193681836125, '奉贤区'),
  Text(1.0548455475098957, 0.311930875192804, '宝山区'),
  Text(0.9312214730007253, 0.5855139351222647, '崇明区'),
  Text(0.5444574577351535, 0.9558065058978066, '徐汇区'),
  Text(0.20406727632042368, 1.0809054291357612, '普陀区'),
  Text(0.0720124213695824, 1.0976402922490092, '杨浦区'),
  Text(-0.1341477258602501, 1.0917895345012807, '松江区'),
  Text(-1.091737001258064, 0.13457458929549085, '浦东新区'),
  Text(-0.43463450461605924, -1.0104913890762024, '虹口区'),
  Text(-0.33172836941676404, -1.048788009525326, '金山区'),
  Text(-0.21239206368419905, -1.0793005194495031, '长宁区'),
  Text(0.0618330366225734, -1.0982607502692754, '闵行区'),
  Text(0.37316080364225124, -1.0347709962233524, '青浦区'),
  Text(0.6799353825369264, -0.864689467713331, '静安区'),
  Text(1.0301725734554428, -0.3856740448878954, '黄浦区')])
In [13]:
plt.savefig("pie.png",dpi=300)
plt.cla()
df2 = df.groupby("room_type")['host_id'].count()
p1 = plt.bar(x=df2.index,height=df2.values)
plt.bar_label(p1, label_type= edge)
plt.xlabel("房屋类型")
plt.ylabel("计数")
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-13-032d76723afb> in <module>
      3 df2 = df.groupby("room_type")['host_id'].count()
      4 p1 = plt.bar(x=df2.index,height=df2.values)
----> 5 plt.bar_label(p1, label_type= edge)
      6 plt.xlabel("房屋类型")
      7 plt.ylabel("计数")

AttributeError: module 'matplotlib.pyplot' has no attribute 'bar_label'
In [14]:
plt.savefig("bar.png",dpi=300)
plt.cla()
df3 = pd.read_csv("F:/OneDrive/MYW/data_science/airbnb-SH-listings.csv/listings.csv")
df4 = df3[['host_since','number_of_reviews']]
df4["host_since"] = pd.to_datetime(df4['host_since'])
df4['day_count'] = max(df4['host_since']) - df4['host_since']
print(df4['day_count'].dt.days)
plt.scatter(x=df4['day_count'].dt.days,y=df4['number_of_reviews'])
plt.xlabel("注册时间")
plt.ylabel("评论数")
0        4232.0
1        3790.0
2        3790.0
3        3790.0
4        3790.0
          ...  
28120     830.0
28121    2573.0
28122    2573.0
28123     547.0
28124       0.0
Name: day_count, Length: 28125, dtype: float64
<ipython-input-14-e20ed5bfd76c>:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4["host_since"] = pd.to_datetime(df4['host_since'])
<ipython-input-14-e20ed5bfd76c>:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['day_count'] = max(df4['host_since']) - df4['host_since']
Out[14]:
Text(0, 0.5, '评论数')
In [ ]: